LaTeX pseudo located here. "English" pseudo located here (this is before I knew how to use LaTeX
Also, will need for implementation of kernel probability density funcitons: KDE Python Notebook (code.md approved)
As of 11/26/2016:
Adding Kurtosis Python Notebook as well as Spectral Analysis
Both pseudocodes can be found in the updated LaTeX in the file above.
For all sine waves created: make sure number of points = 1000 sampled from each wave representing data.
Success:
Hopeful:
Fail:
Basically if we're able to select the poor waves or not and the final dataset does not have the really poor waves.
Start by generating each set of data from above.
import numpy as np
# Fix random seed
initseed = 123456789
np.random.seed(initseed)
numvals = 1000
# First, build the relevant linspace to grab 1000 points
times = np.linspace(0, 1, numvals)
# Then define the general sine wave used throughout
sin = np.sin(2 * np.pi * times)
# Define function for white noise
def gen_whitenoise(mean, std, size):
retval = np.random.normal(mean, std, size=size)
return retval
# Success Case 1 Data
# 50 of the same sine waves
success1 = np.column_stack([sin] * 50)
#print success1.shape
# Success Case 2 Data
# 50 same sine waves, 1 with white noise
success2 = np.column_stack([sin] * 50)
wn = gen_whitenoise(0, 10, numvals)
success2[:, 4] = success2[:, 4] + wn.T
#print success2[0]
# Success Case 3 Data
# 50 different amplitude sine waves, 1 with white noise
success3 = np.column_stack([sin] * 10 +
[sin * 2] * 10 +
[sin * 3] * 10 +
[sin * 4] * 10 +
[sin * 5] * 10)
success3[:, 49] = success3[:, 49] + wn.T
#print success3[0]
#print success3[1]
# Hopeful Case 1 Data
# 40 sine waves, 32 each with small amounts of white noise, 8 with a lot
hope1 = np.column_stack([sin] * 40)
for i in range(0, 32):
hope1[:, i] += gen_whitenoise(0, 0.25, numvals)
for i in range (32, 40):
hope1[:, i] += gen_whitenoise(0, 20, numvals)
#print hope1[:, 0]
# Hopeful Case 2 Data
# 40 sine waves, 32 each with small amounts of white noise, 8 with a lot
hope2 = np.column_stack([sin] * 40)
for i in range(0, 32):
hope2[:, i] += gen_whitenoise(0, 0.25, numvals)
for i in range (32, 40):
hope2[:, i] += gen_whitenoise(0, 10, numvals)
#print hope1[:, 0]
# Hopeful Case 3 Data
# 40 sine waves, 32 each with small amounts of white noise, 8 with a lot
hope3 = np.column_stack([sin] * 40)
for i in range(0, 32):
hope3[:, i] += gen_whitenoise(0, 0.25, numvals)
for i in range (32, 40):
hope3[:, i] += gen_whitenoise(0, 5, numvals)
#print hope1[:, 0]
# Fail Case 1 Data
# 50 same sine waves, 40 with white noise
fail1 = np.column_stack([sin] * 50)
for i in range(0, 40):
fail1[:, i] = fail1[:, i] + gen_whitenoise(0, 2, numvals)
#print fail1[0]
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot
from plotly.graph_objs import *
from plotly import tools
init_notebook_mode()
# Success 1
# Setup plotly data
datasets =[]
for i in range(0, 5):
datasets.append(Scatter(
x = times,
y = success1[:,i],
name = 'Sample ' + str(i)
))
data = [datasets[:]]
# Setup layout
layout = dict(title = 'Success 1',
xaxis = dict(title = 'Time'),
yaxis = dict(title = 'Unit'),
)
# Make figure object
fig = dict(data=datasets, layout=layout)
iplot(fig)
# Success 2
# Setup plotly data
datasets =[]
for i in range(0, 5):
datasets.append(Scatter(
x = times,
y = success2[:,i],
name = 'Sample ' + str(i)
))
data = [datasets[:]]
# Setup layout
layout = dict(title = 'Success 2',
xaxis = dict(title = 'Time'),
yaxis = dict(title = 'Unit'),
)
# Make figure object
fig = dict(data=datasets, layout=layout)
iplot(fig)
# Success 3
# Setup plotly data
datasets =[]
for i in range(0, success3.shape[1]):
datasets.append(Scatter(
x = times,
y = success3[:,i],
name = 'Sample ' + str(i)
))
data = [datasets[:]]
# Setup layout
layout = dict(title = 'Success 3',
xaxis = dict(title = 'Time'),
yaxis = dict(title = 'Unit'),
)
# Make figure object
fig = dict(data=datasets, layout=layout)
iplot(fig)
# Hopeful 1
# Setup plotly data
datasets =[]
for i in range(0, 10):
datasets.append(Scatter(
x = times,
y = hope1[:,i],
name = 'Sample ' + str(i)
))
data = [datasets[:]]
# Setup layout
layout = dict(title = 'Hope 1',
xaxis = dict(title = 'Time'),
yaxis = dict(title = 'Unit'),
)
# Make figure object
fig = dict(data=datasets, layout=layout)
iplot(fig)
# Hopeful 2
# Setup plotly data
datasets =[]
for i in range(0, 10):
datasets.append(Scatter(
x = times,
y = hope2[:,i],
name = 'Sample ' + str(i)
))
data = [datasets[:]]
# Setup layout
layout = dict(title = 'Hope 2',
xaxis = dict(title = 'Time'),
yaxis = dict(title = 'Unit'),
)
# Make figure object
fig = dict(data=datasets, layout=layout)
iplot(fig)
# Hopeful 3
# Setup plotly data
datasets =[]
for i in range(0, 10):
datasets.append(Scatter(
x = times,
y = hope3[:,i],
name = 'Sample ' + str(i)
))
data = [datasets[:]]
# Setup layout
layout = dict(title = 'Hope 3',
xaxis = dict(title = 'Time'),
yaxis = dict(title = 'Unit'),
)
# Make figure object
fig = dict(data=datasets, layout=layout)
iplot(fig)
# Fail 1
# Setup plotly data
datasets =[]
for i in range(0, 5):
datasets.append(Scatter(
x = times,
y = fail1[:,i],
name = 'Sample ' + str(i)
))
data = [datasets[:]]
# Setup layout
layout = dict(title = 'Fail 1',
xaxis = dict(title = 'Time'),
yaxis = dict(title = 'Unit'),
)
# Make figure object
fig = dict(data=datasets, layout=layout)
iplot(fig)
# Reshape Data: Run with no reshaping
electrodes = 0
trials = 0
times = 0
print len(success1.shape)
def reshape(inEEG):
if len(inEEG.shape) == 3:
electrodes = inEEG.shape[1]
times = inEEG.shape[0]
trials = inEEG.shape[2]
return np.reshape(inEEG, (inEEG.shape[0] * inEEG.shape[2], inEEG.shape[1]))
elif len(inEEG.shape) != 1 and len(inEEG.shape) != 2:
# fail case
print "fail"
else:
return inEEG
print "Final dimensions:", reshape(success1).shape
# Reshape Data: Run with reshaping
success1 = np.expand_dims(success1, 2)
print len(success1.shape)
print success1.shape
print "Final dimensions:", reshape(success1).shape
success1 = reshape(success1)
# Reshape Data: run with data where reshaping actually matters
dummy = np.dstack([success1] * 2)
print dummy.shape
print dummy[:,:,1].shape
print "Final dimensions:", reshape(dummy).shape
Kernel density probability dist
# Now define the probability density functions tested in other notebooks
from sklearn.neighbors.kde import KernelDensity
from sklearn.grid_search import GridSearchCV
# Run the KDE!
def kdewrap(indata, kernel):
grid = GridSearchCV(KernelDensity(),
{'bandwidth': np.linspace(0.1, 1.0, 30)},
cv=10) # 10-fold cross-validation
grid.fit(indata[:, None])
kde = KernelDensity(kernel=kernel, bandwidth=grid.best_params_["bandwidth"]).fit(indata[:, np.newaxis])
return kde.score_samples(indata[:, np.newaxis])
Define what's inside loop the KDE will execute in
regelec = success1[:, 0]
#print regelec
probdist = kdewrap(regelec, 'gaussian')
#print probdist
# get joint prob with assumption in log-likelihood format
# leave in log-likelihood to prevent underflow
jointprob = np.sum(probdist)
print jointprob
These are really all the modular testable parts of the code. Now will combine into algorithm + ending section
def prob_baddetec(inEEG, threshold, probfunc):
electrodes = inEEG.shape[1]
# Start by reshaping data (if necessary)
if len(inEEG.shape) == 3:
inEEG = np.reshape(inEEG, (inEEG.shape[0] * inEEG.shape[2], inEEG.shape[1]))
elif len(inEEG.shape) != 1 and len(inEEG.shape) != 2:
# fail case
return -1
# Then, initialize a probability vector of electrode length
probvec = np.zeros(electrodes)
# iterate through electrodes and get joint probs
for i in range(0, electrodes):
# get prob distribution
probdist = probfunc(inEEG[:, i], 'gaussian')
# using probdist find joint prob
probvec[i] = np.sum(probdist)
# normalize probvec
# first calc mean
avg = np.mean(probvec)
# then st, d dev
stddev = np.std(probvec)
# then figure out which electrodes are bad
badelec = []
#print probvec
for i in range(0, len(probvec)):
#print i, avg, stddev, (avg - probvec[i]) / stddev
if abs((avg - probvec[i]) / stddev) >= threshold:
badelec.append(i)
return badelec
from scipy.stats import kurtosis
def kurt_baddetec(inEEG, threshold):
electrodes = inEEG.shape[1]
# Start by reshaping data (if necessary)
if len(inEEG.shape) == 3:
inEEG = np.reshape(inEEG, (inEEG.shape[0] * inEEG.shape[2], inEEG.shape[1]))
elif len(inEEG.shape) != 1 and len(inEEG.shape) != 2:
# fail case
return -1
# Then, initialize a probability vector of electrode length
kurtvec = np.zeros(electrodes)
# iterate through electrodes and get kurtoses
for i in range(0, electrodes):
# add kurtosis to the vector
kurtvec[i] = kurtosis(inEEG[:, i])
#print kurtvec[i]
# normalize kurtvec
# first calc mean
avg = np.mean(kurtvec)
# then std dev
stddev = np.std(kurtvec)
# then figure out which electrodes are bad
badelec = []
#print probvec
for i in range(0, len(kurtvec)):
#print i, avg, stddev, (avg - kurtvec[i]) / stddev
if abs((avg - kurtvec[i]) / stddev) >= threshold:
badelec.append(i)
return badelec
def spec_baddetec(inEEG, posthresh, negthresh):
electrodes = inEEG.shape[1]
# Start by reshaping data (if necessary)
if len(inEEG.shape) == 3:
inEEG = np.reshape(inEEG, (inEEG.shape[0] * inEEG.shape[2], inEEG.shape[1]))
elif len(inEEG.shape) != 1 and len(inEEG.shape) != 2:
# fail case
return -1
# initialize badelec as an empty array
badelec = []
# iterate through electrodes and get spectral densities
for i in range(0, electrodes):
# get frequency spectrum for electrode
sp = np.fft.fft(inEEG[:, i]).real
sp = sp - np.mean(sp)
for power in sp:
if power > posthresh or power < negthresh:
badelec.append(i)
break
return badelec
def good_elec(inEEG, badelec):
return np.delete(inEEG, badelec, 1)
def qual_eval(badelec, expected):
print "Expected:", expected
print "Actual:", badelec
def qual_plot(data, title):
# Setup plotly data
datasets =[]
for i in range(0, data.shape[1]):
datasets.append(Scatter(
x = times,
y = data[:,i],
name = 'Sample ' + str(i)
))
data = [datasets[:]]
# Setup layout
layout = dict(title = title,
xaxis = dict(title = 'Time'),
yaxis = dict(title = 'Unit'),
)
# Make figure object
fig = dict(data=datasets, layout=layout)
iplot(fig)
Just check the correct number of bad electrodes were removed and which ones were removed
def quant_eval(badelec, expected):
if set(x) == set(y):
return true
else:
return false
#dummybad_prob = prob_baddetec(success3, 3, kdewrap)
#dummybad_kurt = kurt_baddetec(success3, 3)
#dummybad_spec = spec_baddetec(success3, 10, -50)
#print dummybad_prob
#print dummybad_kurt
#print dummybad_spec
# First run on all datasets
s1probbad = prob_baddetec(success1, 3, kdewrap)
s2probbad = prob_baddetec(success2, 3, kdewrap)
s3probbad = prob_baddetec(success3, 3, kdewrap)
h1pobbad = prob_baddetec(hope1, 3, kdewrap)
f1probbad = prob_baddetec(fail1, 3, kdewrap)
s1bad = set(s1probbad)
s1kurtbad = kurt_baddetec(success1, 3)
s1bad.update(s1kurtbad)
s1specbad = spec_baddetec(success1, 50, -50)
s1bad.update(s1specbad)
print s1probbad
print s1kurtbad
print s1specbad
print s1bad
s2bad = set(s2probbad)
s2kurtbad = kurt_baddetec(success2, 3)
s2bad.update(s2kurtbad)
s2specbad = spec_baddetec(success2, 50, -50)
s2bad.update(s2specbad)
print s2probbad
print s2kurtbad
print s2specbad
print s2bad
s3bad = set(s3probbad)
s3kurtbad = kurt_baddetec(success3, 3)
s3bad.update(s3kurtbad)
s3specbad = spec_baddetec(success3, 50, -50)
s3bad.update(s3specbad)
print s2probbad
print s2kurtbad
print s2specbad
print s2bad
h1bad = set(h1pobbad)
h1kurtbad = kurt_baddetec(hope1, 3)
h1bad.update(h1kurtbad)
h1specbad = spec_baddetec(hope1, 50, -50)
h1bad.update(h1specbad)
print h1pobbad
print h1kurtbad
print h1specbad
print h1bad
#h1prob13 = prob_baddetec(hope1, 1.5, kdewrap)
#h1prob4 = prob_baddetec(hope1, 2, kdewrap)
#h1prob1 = prob_baddetec(hope1, 2.5, kdewrap)
#h1prob0 = prob_baddetec(hope1, 3, kdewrap)
print "13 percent rejected", h1prob13
print "4 percent rejected", h1prob4
print "1 percent rejected", h1prob1
print ".27 percent rejected", h1prob0
h1kurt13 = kurt_baddetec(hope1, 1.5)
h1kurt4 = kurt_baddetec(hope1, 2)
h1kurt1 = kurt_baddetec(hope1, 2.5)
h1kurt0 = kurt_baddetec(hope1, 3)
print "13 percent rejected", h1kurt13
print "4 percent rejected", h1kurt4
print "1 percent rejected", h1kurt1
print ".27 percent rejected", h1kurt0
f1bad = set(f1probbad)
f1kurtbad = kurt_baddetec(fail1, 3)
f1bad.update(f1kurtbad)
f1specbad = spec_baddetec(fail1, 50, -50)
f1bad.update(f1specbad)
print f1probbad
print f1kurtbad
print f1specbad
print f1bad
f1prob13 = prob_baddetec(fail1, 1.5, kdewrap)
f1prob4 = prob_baddetec(fail1, 2, kdewrap)
f1prob1 = prob_baddetec(fail1, 2.5, kdewrap)
f1prob0 = prob_baddetec(fail1, 3, kdewrap)
print "13 percent rejected", f1prob13
print "4 percent rejected", f1prob4
print "1 percent rejected", f1prob1
print ".27 percent rejected", f1prob0
f1kurt13 = kurt_baddetec(fail1, 1.5)
f1kurt4 = kurt_baddetec(fail1, 2)
f1kurt1 = kurt_baddetec(fail1, 2.5)
f1kurt0 = kurt_baddetec(fail1, 3)
print "13 percent rejected", f1kurt13
print "4 percent rejected", f1kurt4
print "1 percent rejected", f1kurt1
print ".27 percent rejected", f1kurt0
qual_eval(list(s1bad), [])
qual_plot(good_elec(success1, list(s1bad)), "Success 1")
qual_eval(list(s2bad), [4])
qual_plot(good_elec(success2, list(s2bad)), "Success 2")
qual_eval(list(s3bad), [49])
qual_plot(good_elec(success3, list(s3bad)), "Success 3")
# just with prob baddetec 2.5 [32, 33, 37, 38]
qual_eval(list(h1bad), range(32,40))
qual_plot(good_elec(hope1, list(h1bad)), "Hope 1")
qual_eval(list(f1bad), [])
qual_plot(good_elec(fail1, list(f1bad)), "Fail 1")
Honestly, anything being gotten out of quanitative eval was gotten earlier
Unfortunately, what I expected to happen didn't exactly happen. At 3 standard deviations away (which is a standard lent by what the researchers used in their own probability implementation), none of the noisy data was there. Attempting to modify the noise inputted into the data by changing the scale of which the noise was being added (ie changing stddev from 1 -> 2 -> 3) had literally no effect on the outcome. What makes sense is that maybe because the number of white noise values being used is so high, they accurately depict a normal graph, and if they depict a normal graph, the distribution that is eventually modelled by the kernel probability is essentially a normal graph. Not sure best way to deal with this problem (or should I fix simulations?) but will ask Jovo for help before moving more